# -*- coding:utf-8 -*-
# @Time : 2021/2/25 18:10
# @Author : Administrator
# @File : novel.py
# @Software: PyCharm
# @Motto: good good study,day day up

import requests
from parsel import Selector
from loguru import logger
from pathlib import Path
import multiprocessing
import os
from tqdm import tqdm


def get_response(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
    }
    response = requests.get(url=url, headers=headers)
    # response.encoding = response.apparent_encoding
    return response


def save(novel_name, title, content):
    novel_path = Path.cwd().parent / 'novel'
    if not os.path.exists(novel_path):
        os.mkdir(novel_path)
    novel_name = novel_path / novel_name
    if not os.path.exists(novel_name):
        os.mkdir(novel_name)
    file_name = novel_name.joinpath(f'{title}.txt')
    if not os.path.exists(file_name):
        with open(file_name, mode='a', encoding='utf-8') as f:
            # 写入标题
            f.write(title)
            # 换行
            f.write('\n')
            # 写入小说内容
            f.write(content)


def get_one_novel(novel_name, novel_url):
    response = get_response(novel_url)
    selector = Selector(response.text)
    title = selector.xpath('//div[@class="bookname"]/h1/text()').get()
    content_list = selector.xpath('//div[@id="content"]/text()').getall()
    content_str = ''.join(content_list)
    save(novel_name, title, content_str)


def get_all_url(url):
    response = get_response(url)
    selector = Selector(response.text)
    novel_name = selector.xpath('//div[@id="info"]/h1/text()').get()
    items = selector.xpath('//div[@id="list"]//dl//dd/a/@href').getall()
    for item in tqdm(items):
        novel_url = 'http://www.biquges.com' + item
        get_one_novel(novel_name, novel_url)


if __name__ == '__main__':
    get_all_url('http://www.mayiwxw.com/54_54181/index.html')
